* build_var_ocupados.do
* This file modifies the ocupados data by modifying and creating the variables necessary for the regression analysis
* Inputs :/Data/Source/ocupados.dta
* Outputs: /Data/Source/ocupados_mw.dta

* 1. Keep only the cities that appear in all the quarters and known occupations
* 2. "Fix" the weights
* 2.1 Calculate employment rates : Occupied over labor force
* 3. Keep only employed (no selfemployed, familiar), formal and create nominal wage and income variables
* 4. Merge GDP by departamento - Later
* 5. Merge IPC information to calculate real variables
* 6. Calculate real variables. Base= Bogot� 1996 Trimestre 1
* 7. Merge minimum wage and calculate real minimum wages, leads and lags
* 8. Calculate Bartik price variables
* 9. Calculate city specific trends.

*************************** Change log ********************************** 

/*
	01/23/15	JEP		Drop 2001 observation generated when MW is merged
	01/24/15	JEP		Add calculation of mw leads and lags, city specific trends, take these out of build mw measures.
	04/5/2015	JEP		Add calculation of Bartik variables excluding agriculture, mining, public services
*/


clear all
version 13.1

* Project info
cap project, doinfo
if _rc==198 {
	if c(os)=="Unix" loc master "/home/jperez/AA_Minimum_Wage" 
 else if c(username)=="J16339" loc master "B:\Col_Minimum_Wage"
	else loc master "C:/Users/jorpp/Dropbox (Brown)/Col_Minimum_Wage"
 loc pr = 0
}
else {
	local master "`r(pdir)'"
	local doname "`r(dofile)'"  
 loc pr = 1
	* Project calls
project, uses("`master'/Data/Source/ocupados.dta")
project, uses("`master'/Data/Clean/Precios/precios98_c.dta")
project, uses("`master'/Data/Clean/Precios/precios98_ec.dta")
project, uses("`master'/Data/Clean/Precios/precios98_p.dta")
project, uses("`master'/Data/Clean/MW/serie_salario_minimo.dta")
}



use "`master'/Data/Source/ocupados.dta"



* cd "C:/Users/JorgeEduardo/Dropbox/AA_Minimum_Wage"
* use "Data/Source/ocupados.dta", clear

***** 1. Keep only the cities that appear in all the quarters and known occupations

/* levelsof area, local(areas)
levelsof time, local(times)
foreach area in `areas' {
	foreach time in `times'	{
		cap assert area!=`area' if time==`time'
		if !_rc drop if area==`area'
	}
} */
* Muy poquita gente en el Valle de Aburr�
drop if area==6
* Resultado final
keep if inlist(area,5,6,8,11,17,52,68,76)

* Drop unknown occupation
drop if cocupacion==0

***** 2. "Fix" the weights
* This is very heuristic, but that's the way it's done when working with the monthly ECH
* Remember the ocupados files are representative by themselves
* Current weights are to generate quarterly totals.
* For yearly totals, divide by 4
gen fexy=fex/4
* For sample, divide fexy by 5
gen fexs=fexy/5

***** 2.1 Calculate employment rates, age, gender and education composition by city time
* Notice these are calculated over the labor force.
preserve
xi i.cedad
collapse (count) tipo_registro [iw=fex], by(ocupado area time)
ren tipo num
reshape wide num, i(area time) j(ocupado)
gen emp=num3/(num1+num3)
gen empcount=num3
gen lfcount=num1
keep emp empcount lfcount area time
* Save a dataset with employment rates
* save "`master'/Data/Source/empleo.dta", replace
restore
merge n:1 area time using "`master'/Data/Source/empleo.dta"
drop _merge 

* Also calculate by industry - time
* preserve 
collapse (count) tipo_registro [iw=fex], by(ocupado tipo_trabajador time)
ren tipo_registro num
reshape wide num, i(tipo_trabajador time) j(ocupado)
gen empind=num3/1000
keep empind tipo_trabajador time
